notebook.community

Edit and run



In [1]:

    
%pylab
%matplotlib inline









    



Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib



In [2]:

    
cd ..









    



/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-net-work



In [3]:

    
import sys
import numpy as np
import skimage
import cv2
import sklearn
import imp



In [4]:

    
from holoviews import *









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-4-008fb02dab24> in <module>()
----> 1 from holoviews import *

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/__init__.py in <module>()
      8                             commit="$Format:%h$", reponame='holoviews')
      9 
---> 10 from .core.dimension import Dimension         # pyflakes:ignore (API import)
     11 from .core.boundingregion import BoundingBox  # pyflakes:ignore (API import)
     12 from .core.layout import *                    # pyflakes:ignore (API import)

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/__init__.py in <module>()
      1 from .boundingregion import *  # pyflakes:ignore (API import)
----> 2 from .dimension import *       # pyflakes:ignore (API import)
      3 from .element import *         # pyflakes:ignore (API import)
      4 from .layout import *          # pyflakes:ignore (API import)
      5 from .operation import *       # pyflakes:ignore (API import)

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/dimension.py in <module>()
     10 
     11 from ..core.util import valid_identifier
---> 12 from .options import Store
     13 from .pprint import PrettyPrinter
     14 

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/options.py in <module>()
     66 
     67 
---> 68 class Cycle(param.Parameterized):
     69     """
     70     A simple container class that specifies cyclic options. A typical

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/options.py in Cycle()
     78 
     79     items = param.List(default=None, allow_None=True,  doc="""
---> 80         If supplied, the explicit list of items to be cycled over.""")
     81 
     82     rckey = param.String(default='axes.color_cycle', doc="""

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/param/__init__.pyc in __init__(self, default, class_, instantiate, bounds, **params)
   1056         self.class_ = class_
   1057         self.bounds = bounds
-> 1058         self._check_bounds(default)
   1059         Parameter.__init__(self,default=default,instantiate=instantiate,
   1060                            **params)

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/param/__init__.pyc in _check_bounds(self, val)
   1074         """
   1075         if not (isinstance(val,list)):
-> 1076             raise ValueError("List '%s' must be a list."%(self._attrib_name))
   1077 
   1078         if self.bounds is not None:

AttributeError: _attrib_name



In [5]:

    
import neukrill_net.utils
import neukrill_net.highlevelfeatures



In [6]:

    
import time



In [7]:

    
settings = neukrill_net.utils.Settings('settings.json')



In [8]:

    
X,y = settings.flattened_train_paths(settings.classes)



In [9]:

    
hlf = neukrill_net.highlevelfeatures.ContourMoments()



In [10]:

    
t0 = time.time()
XF = hlf.transform(X)
print("Computing features took {}".format(time.time()-t0))



In [24]:

    
XF.shape









    Out[24]:





(1, 30336, 30)



In [26]:

    
sklearn.externals.joblib.dump((hlf,XF,y),'cache/contourmoments.pkl')









    Out[26]:





['cache/contourmoments.pkl', 'cache/contourmoments.pkl_01.npy']

Naive Bayes



In [11]:

    
import sklearn.naive_bayes



In [12]:

    
clf = sklearn.naive_bayes.GaussianNB()



In [13]:

    
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))









    



Time=0.0596358776093
Accuracy=0.122428797468
Logloss=21.7494153182

Logistic Regression



In [14]:

    
clf = sklearn.linear_model.LogisticRegression(random_state=42)



In [15]:

    
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))









    



Time=51.0716290474
Accuracy=0.295424578059
Logloss=2.83966800982

Random Forest



In [18]:

    
import sklearn.ensemble



In [19]:

    
clf = sklearn.ensemble.RandomForestClassifier(n_estimators=1000, max_depth=20, min_samples_leaf=5)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))









    



Time=142.819734097
Accuracy=0.372824367089
Logloss=2.55767029422

Linear SVC



In [23]:

    
clf = sklearn.svm.SVC(kernel='linear', probability=True, random_state=42)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))









    



Time=72.912648201
Accuracy=0.339135021097
Logloss=2.64244706442

Non-linear SVC

one-vs-one



In [22]:

    
clf = sklearn.svm.SVC(probability=True, random_state=42)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))









    



Time=92.4036419392
Accuracy=0.294765295359
Logloss=2.71083425129

What if we add the Haralick features to this?



In [16]:

    
hlf2 = neukrill_net.highlevelfeatures.Haralick()



In [17]:

    
hlf_ = hlf+hlf2



In [18]:

    
XF_ = hlf_.transform(X)



In [19]:

    
XF_.shape









    Out[19]:





(1, 30336, 56)



In [20]:

    
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF_.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))









    



Time=91.0343399048
Accuracy=0.493934599156
Logloss=1.96769402153



In [ ]: